home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_400
/
422_02
/
misc
/
hftext.c
< prev
next >
Wrap
C/C++ Source or Header
|
1994-03-20
|
3KB
|
130 lines
/*
* This is a **very** simple text compression program which performs
* a "Huffman" encoding of the most common characters in the input file.
* It is designed to operate as a "unix" style filter, accepting input
* from "stdin" and writing to "stdout".
*
* Syntax: HFTEXT (Encode | Decode) <input_file >output_file
*
* First, the input file is scanned, and a table built up containing
* the most common characters (highest frequency at the beginning of
* the table). Then the file is re-read, and any characters which are
* in the table are replaced with a series of one bits equal in number
* to its position in the table, followed by a zero bit. Thus, the
* highest frequency character is encoded into two bits, the second
* highest is encoded into three bits etc...
*
* Characters not occuring in the table are written with a zero bit,
* followed by the 7 bits of the ASCII character value.
*
* Note that this scheme works only on ASCII text files, and becomes *very*
* confused if the original file contains characters with the high bit set.
*
* Compile command: cc hftext -fop
*/
#include <stdio.h>
#include <file.h>
#define TSIZE 7 /* Size of common character table */
unsigned ftable[256] = 0, ocount = 0;
unsigned char ctable[TSIZE] = 0, obyte = 0;
main(argc, argv)
int argc;
char *argv[];
{
int i, j, k;
stdin = setbuf(stdin, 1000);
stdout = setbuf(stdout, 1000);
/* Use MICRO-C's more powerful '&&' to force a zero if !enough args */
switch((argc > 1) && toupper(*argv[1])) {
case 'E' : /* Encode the file */
*(char*)stdout |= F_BINARY; /* Convert stdout to BINARY */
while((i = getc(stdin)) != EOF)
++ftable[i];
rewind(stdin);
/* Build table of most frequent characters */
for(i=0; i < TSIZE; ++i) {
k = 0;
for(j=1; j < 256; ++j)
if(ftable[j] > ftable[k])
k = j;
ctable[i] = k;
ftable[k] = 0; }
/* Write the index table */
fwrite(ctable, TSIZE, stdout);
/* Process the file */
while((i = getc(stdin)) != EOF) {
for(j=0; j < TSIZE; ++j) {
if(ctable[j] == i)
break; }
if(j < TSIZE) { /* Write a token */
do
write_bit(1);
while(j--);
write_bit(0); }
else { /* Write the character */
write_bit(0);
for(k=0; k < 7; ++k) {
write_bit(i & 0x01);
i >>= 1; } } }
/* Clean up output bits */
while(obyte)
write_bit(0);
break;
case 'D' : /* Decode the file */
*(char*)stdin |= F_BINARY; /* Convert stdin to BINARY */
fread(ctable, TSIZE, stdin);
while((i = read_bit()) != EOF) {
j = 0;
if(i) { /* token */
while((k = read_bit()) && (k != EOF))
++j;
j = ctable[j]; }
else { /* Normal character */
for(k=0; k < 7; ++k)
j = (j >> 1) | read_bit();
j >>= 1; }
putc(j, stdout); }
break;
default:
abort("Use: HFTEXT E|D <input_file >output_file"); }
fflush(stdout);
}
/*
* Write a single bit to the output file
*/
write_bit(value)
int value;
{
obyte = (obyte << 1) | value;
if(++ocount > 7) {
putc(obyte, stdout);
ocount = obyte = 0; }
}
/*
* Read a single bit from the input file
*/
read_bit()
{
int i;
if(!ocount) {
if((obyte = getc(stdin)) == EOF)
return EOF;
ocount = 8; }
i = obyte;
obyte <<= 1;
--ocount;
return i & 0x80;
}